version 16.1
clear all
cd "MYPATH\draft_tab_fig"
adopath + ../ado/

cap log close
pause on 

log using rf_figures.log, replace

preliminaries
foreach PATH in RESULTS TEMP {
	cap mkdir "${`PATH'}\draft_tab_fig"
    if "`PATH'" != "TEMP" cap mkdir "${`PATH'}\draft_tab_fig\figures"
	if "`PATH'" != "TEMP" cap mkdir "${`PATH'}\draft_tab_fig\fig_data"
}

program main
	nt_risk_dist
	key_figures
	export_figures
end


program nt_risk_dist
	use "$DATA\analysis_sample", clear
	local count = 1
	
	gen ln_subt = log(subt)
	
	local bin_size = 1000
	qui gen bin_number = .
	forval bin_max = `bin_size'(`bin_size')20000 {
			local bin_inf = `bin_max' - `bin_size'
			qui replace bin_number = `count' if fetus_risk <= `bin_max' & fetus_risk > `bin_inf'
			local count = `count' + 1
	}
	qui unique bin_number
	local num_bins = r(unique)
	
	local ylab = "0(.1).5"
	local yscl = "0 .5"

	* KUB risk distribution w/ second y-axis for share subt 
	qui sum fetus_risk, d
	local N : dis%8.0fc r(N)
	local sd: dis%8.2fc r(sd)
	local mean: dis%9.2fc r(mean)
	local min: dis%7.0fc r(min)
	local q1: dis%7.0fc r(p25)
	local median: dis%7.0fc r(p50)
	local q3: dis%7.0fc r(p75)
	local max: dis%7.0fc r(max)
	binscatter subt fetus_risk, line(connect) legend(off) name(risk_dist_w_subt) ///
	  xtitle("NT Screening result (estimated risk of chromosomal abnormalities)") ///
	  yscale(r(`yscl') alt) ytitle( , size(small)) ///
	  ylabel(`ylab', format(%3.1f)) ///
	  xlabel(2 "1/2" 4000 "1/4,000" 8000 "1/8,000" 12000 "1/12,000" ///
	    16000 "1/16,000" 20000 "1/20,000", format(%7.0fc)) ///
	  xscale(reverse) ///
	  m(o) mcolor(green) lcolor(green) ///
	  xline(1000) xq(bin_number) ///
	  legend(size(vsmall) on order(- "N         = `N'" ///
													 "Mean      = `mean'" ///
													 "SD        = `sd'" ///
													 "Min       = `min'" ///
													 "Q1        = `q1'" ///
													 "Median    = `median'" ///
													 "Q3        = `q3'" ///
													 "Max       = `max'") ///
	  pos(1) ring(0) region(lwidth(none))) legend(subtitle("NT risk distribution", size(vsmall) just(left)))
	addplot risk_dist_w_subt: hist fetus_risk, percent color(blue%20) yaxis(2) yscale(r(`yscl')) ///
	  ytitle("% of pregnancies (histogram)", orientation(vertical) axis(2) size(small)) ///
	  ytitle("Share received any post-NT testing", orientation(rvertical) axis(1) size(small)) ///
	  ylabel(0(5)40, format(%3.0f) axis(2) nogrid) ylabel(`ylab', format(%3.1f) axis(1) nogrid) ///
	  bin(`num_bins') ///
	  xlabel(2 "1/2" 4000 "1/4,000" 8000 "1/8,000" 12000 "1/12,000" ///
	    16000 "1/16,000" 20000 "1/20,000", format(%7.0fc)) ///
	  xscale(reverse) /// 
	  plotregion(margin(zero)) ///
	  legend(size(vsmall) on order(- "N         = `N'" ///
													 "Mean      = `mean'" ///
													 "SD        = `sd'" ///
													 "Min       = `min'" ///
													 "Q1        = `q1'" ///
													 "Median    = `median'" ///
													 "Q3        = `q3'" ///
													 "Max       = `max'") ///
	  pos(1) ring(0) region(lwidth(none))) legend(subtitle("NT risk distribution", size(vsmall) just(left)))
	  
	* Just risk dist 
	hist fetus_risk, frequency color(blue%50) yscale(r()) name(risk_dist) ///
	  ytitle("", orientation(vertical) size(small)) ///
	  ylabel(, format(%6.0fc)) ///
	  bin(`num_bins') ///
	  xlabel(2 "1/2" 4000 "1/4,000" 8000 "1/8,000" 12000 "1/12,000" ///
	    16000 "1/16,000" 20000 "1/20,000", format(%7.0fc)) ///
	  xscale(reverse) /// 
	  plotregion(margin(zero)) ///
	  legend(size(vsmall) on order(- "N         = `N'" ///
													 "Mean      = `mean'" ///
													 "SD        = `sd'" ///
													 "Min       = `min'" ///
													 "Q1        = `q1'" ///
													 "Median    = `median'" ///
													 "Q3        = `q3'" ///
													 "Max       = `max'") ///
	  pos(1) ring(0) region(lwidth(none))) legend(subtitle("NT risk distribution", size(vsmall) just(left)))
	twoway__histogram_gen fetus_risk, percent bin(`num_bins)') ///
	  generate(risk_hist_height risk_hist_cb, replace)
	preserve 
	keep if !mi(risk_hist_height) | !mi(risk_hist_cb)
	keep risk_hist_cb risk_hist_height
	gsort -risk_hist_cb
	export delimited using "$RESULTS/draft_tab_fig/fig_data/risk_dist.csv", replace
	restore
	
	* Just share subt -- y in log scale 
	preserve 
	collapse (mean) subt, by(bin_number)
	twoway connect subt bin_number, legend(off) name(share_subt_logscale) ///
	  xtitle("NT Screening result (estimated risk of chromosomal abnormalities)") ///
	  yscale(log) ylabel(0 .01 .02 .05 .1 .20 .30 .40) ///
	  ytitle("") ///
	  xlabel(1 "{&ge} 1/1,000" 4 "1/4,000" 8 "1/8,000" 12 "1/12,000" ///
	    16 "1/16,000" 20 "1/20,000", format(%7.0fc)) ///
	  xscale(reverse) ///
	  m(o) mcolor(green) lcolor(green) ///
	  xline(1)
	gsort -bin_number
	export delimited using "$RESULTS/draft_tab_fig/fig_data/share_subt_logscale.csv", replace
	restore 
  
	* KUB risk distribution w/ second y-axis for share subt, x from 1000 to 0
	qui keep if fetus_risk <= 1000
	local count = 1
	local bin_size = 25
	qui drop bin_number
	qui gen bin_number = .
	forval bin_max = `bin_size'(`bin_size')1000 {
			local bin_inf = `bin_max' - `bin_size'
			qui replace bin_number = `count' if fetus_risk <= `bin_max' & fetus_risk > `bin_inf'
			local count = `count' + 1
	}
	qui unique bin_number
	local num_bins = r(unique)
	
	local ylab = "0(.1)1"
	local yscl = "0 1"
	
	qui sum fetus_risk, d
	local N : dis%8.0fc r(N)
	local sd: dis%8.2fc r(sd)
	local mean: dis%9.2fc r(mean)
	local min: dis%7.0fc r(min)
	local q1: dis%7.0fc r(p25)
	local median: dis%7.0fc r(p50)
	local q3: dis%7.0fc r(p75)
	local max: dis%7.0fc r(max)
	
	
	binscatter subt fetus_risk, line(connect) legend(off) name(risk_dist_w_subt_1000) ///
	  xtitle("NT Screening result (estimated risk of chromosomal abnormalities)") ///
	  yscale(r(`yscl') alt) ytitle( , size(small)) ///
	  ylabel(`ylab', format(%3.1f)) ///
	  xlabel(2 "1/50" 200 "1/200" 400 "1/400" 600 "1/600" ///
	    800 "1/800" 1000 "1/1,000", format(%7.0fc)) ///
      xscale(reverse) /// 
	  m(o) mcolor(green) lcolor(green) ///
	  xline(200 51) xq(bin_number) ///
	  legend(size(vsmall) on order(- "N         = `N'" ///
													 "Mean      = `mean'" ///
													 "SD        = `sd'" ///
													 "Min       = `min'" ///
													 "Q1        = `q1'" ///
													 "Median    = `median'" ///
													 "Q3        = `q3'" ///
													 "Max       = `max'") ///
	  pos(11) ring(0) region(lwidth(none))) legend(subtitle("NT risk distribution", size(vsmall) just(left)))
	addplot risk_dist_w_subt_1000: hist fetus_risk, percent color(blue%20) yaxis(2) yscale(r(`yscl')) ///
	  ytitle("% of pregnancies (histogram)", orientation(vertical) axis(2) size(small)) ///
	  ytitle("Share received any post-NT testing", orientation(rvertical) axis(1) size(small)) ///
	  ylabel(0(5)15, format(%3.0f) axis(2) nogrid) ylabel(`ylab', format(%3.1f) axis(1) nogrid) ///
	  bin(`num_bins') ///
	  xlabel(51 "1/51" 200 "1/200" 400 "1/400" 600 "1/600" ///
	    800 "1/800" 1000 "1/1000", format(%7.0fc)) ///
      xscale(reverse) xline(200 51) /// 
	  plotregion(margin(zero)) ///
	  legend(size(vsmall) on order(- "N         = `N'" ///
													 "Mean      = `mean'" ///
													 "SD        = `sd'" ///
													 "Min       = `min'" ///
													 "Q1        = `q1'" ///
													 "Median    = `median'" ///
													 "Q3        = `q3'" ///
													 "Max       = `max'") ///
	  pos(11) ring(0) region(lwidth(none))) legend(subtitle("NT risk distribution", size(vsmall) just(left)))
	
	* Just risk dist, x from 1000 to 0
	hist fetus_risk, frequency color(blue%50) yscale(r()) name(risk_dist_1000) ///
	  ytitle("", orientation(vertical) size(small)) ///
	  ylabel(, format(%6.0fc)) ///
	  bin(`num_bins') ///
	  xlabel(51 "1/51" 200 "1/200" 400 "1/400" 600 "1/600" ///
	    800 "1/800" 1000 "1/1000", format(%7.0fc)) ///
      xscale(reverse) xline(200 51) /// 
	  plotregion(margin(zero)) ///
	  legend(size(vsmall) on order(- "N         = `N'" ///
													 "Mean      = `mean'" ///
													 "SD        = `sd'" ///
													 "Min       = `min'" ///
													 "Q1        = `q1'" ///
													 "Median    = `median'" ///
													 "Q3        = `q3'" ///
													 "Max       = `max'") ///
	  pos(11) ring(0) region(lwidth(none))) legend(subtitle("NT risk distribution", size(vsmall) just(left)))
	twoway__histogram_gen fetus_risk, percent bin(`num_bins)') ///
	  generate(risk_hist_height risk_hist_cb, replace)
	preserve 
	keep if !mi(risk_hist_height) | !mi(risk_hist_cb)
	keep risk_hist_cb risk_hist_height
	gsort -risk_hist_cb
	export delimited using "$RESULTS\draft_tab_fig/fig_data/risk_dist_1000.csv", replace
	restore
	
	* Just share subt, x from 1000 to 0, y log scale
	preserve 
	collapse (mean) subt, by(bin_number)
	qui replace bin_number = bin_number * 25
	twoway connect subt bin_number, legend(off) name(share_subt_1000_logscale) ///
	  xtitle("NT Screening result (estimated risk of chromosomal abnormalities)") ///
	  yscale(log) ytitle( , size(small)) ///
	  ylabel(.03 .05 .10 .20 .40 .80 1, format(%3.2f)) ytitle("") ///
	  xlabel(50 "1/50" 200 "1/200" 400 "1/400" 600 "1/600" ///
	    800 "1/800" 1000 "1/1,000", format(%7.0fc)) ///
      xscale(reverse) xline(200 51) /// 
	  m(o) mcolor(green) lcolor(green)
	
	gsort -bin_number
	export delimited using "$RESULTS/draft_tab_fig/fig_data/share_subt_1000_logscale.csv", replace
	restore 
end


program key_figures
	use "$DATA\analysis_sample", clear
	label var did_nipt "Share got NIPT"
	label var did_invasive "Share got invasive" 
	
	gen post_nipt = 1*(wave == 3)
	keep if wave != 1
	drop if fetus_risk > 1000	
	save "${TEMP}\draft_tab_fig\fig_samp.dta", replace
	
	
	foreach gr in "51_200" "hr_cov" "51_1000" "1000" {
		di "`gr'"
		qui use "${TEMP}\draft_tab_fig\fig_samp.dta", clear 
		qui keep if nipt_`gr' == 1
		if "`gr'" == "51_200" local cov_risk = "[1:200, 1:51]"
		if "`gr'" == "hr_cov" local cov_risk = "[1:200, 1:2]"
		
		qui count 
		local n_preg: di %7.0fc r(N)
		qui count if wave == 2 
		local n_preg2: di %7.0fc r(N)
		qui count if wave == 3 
		local n_preg3: di %7.0fc r(N)
		di "Total: N= `n_preg'"
		di "Wave 2: N = `n_preg2', Wave 3: N = `n_preg3'"

		* Original Figures *
		local lines "51 200"
		local bin_size = 25
		qui gen bin_number = .
		local count = 1
		forval bin_max = `bin_size'(`bin_size')1000 {
			local bin_inf = `bin_max' - `bin_size'
			qui replace bin_number = `count' if fetus_risk <= `bin_max' & fetus_risk > `bin_inf'
			local count = `count' + 1
		}
		preserve
		foreach var in did_nipt {
			local ylab = "0(.1)1"
			local yscl = "0 1"
			local ytext = "1"
			qui unique bin_number
			local num_bins = r(unique)
			if "`var'" == "did_nipt" local ytit = "NIPT take-up (%)"	
			collapse (mean) `var' (count) pregnancy, by(bin_number post_nipt)
			qui replace bin_number = bin_number * 25
			twoway (connect `var' bin_number if post_nipt == 0, m(o) mcolor(green) lcolor(green)) ///
			  (connect `var' bin_number if post_nipt == 1, m(t) mcolor(blue) lcolor(blue)), ///
			  legend(on) name(`var'_o_`gr') ///
			  xtitle("NT Screening result (estimated risk of chromosomal abnormalities)") ///
			  yscale(r(`yscl'))  ytitle( , size(small)) ///
			  ylabel(`ylab', format(%3.2f)) ///
			  xlabel(2 "1/2" 51 "1/51" 200 "1/200" 400 "1/400" ///
				600 "1/600" 800 "1/800" 1000 "1/1000", format(%7.0fc)) ///
			  xscale(reverse) ///
			  legend(order(1 "Prior to any NIPT coverage" 2 "NIPT coverage for risk in `cov_risk'") ///
				pos(6) span row(1) col(2)) ///
			  xline(`lines') ///
			  plotregion(margin(zero)) ///
			  text(`ytext' 25 "High risk", yaxis(1) place(n) size(vsmall)) ///
			  text(`ytext' 125 "Medium risk", yaxis(1) place(n) size(vsmall)) ///
			  text(`ytext' 600 "Low risk", yaxis(1) place(n) size(vsmall))
			
			export delimited using "$RESULTS/draft_tab_fig/fig_data/did_nipt_o_`gr'.csv", replace
		}
		restore
		**************************************************************************
		* Custom Bin Sizes * 
		cap drop bin_number 
		qui gen bin_number = .
		local count = 1
		local bin_size = 25
		forval bin_max = `bin_size'(`bin_size')1000 {
			local bin_inf = `bin_max' - `bin_size'
			qui replace bin_number = `count' if fetus_risk <= `bin_max' & fetus_risk > `bin_inf'
			local count = `count' + 1
		}
		qui gen count_preg = 1
		bys bin_number: egen tot_preg_bin = total(count_preg)
		qui sum count_preg, d
		local n_preg = r(sum)
		
		preserve 
		collapse (mean) did_nipt did_invasive subt termination_clean no_live healthy chroma tot_preg_bin (count) pregnancy, ///
		  by(bin_number post_nipt)

		qui replace bin_number = bin_number * 25
		foreach var in did_invasive subt termination_clean no_live healthy chroma {
			local ylab = "0(.10)1"
			local yscl = "0 1"
			local ytext = "1"
			if inlist("`var'", "termination_clean", "no_live") {
				local ylab = "0(.1).5"
				local yscl = "0 .5"
				local ytext = ".5"
			}
			if inlist("`var'", "chroma") {
				local ylab = "0(.01).10"
				local yscl = "0 .10"
				local ytext = ".10"
			}

			qui unique bin_number
			local num_bins = r(unique)
			twoway (connect `var' bin_number if post_nipt == 0, m(o) mcolor(green) lcolor(green)) ///
			  (connect `var' bin_number if post_nipt == 1, m(t) mcolor(blue) lcolor(blue)), ///
			  legend(on) name(`var'_nb_`gr') ///
			  xtitle("NT Screening result (estimated risk of chromosomal abnormalities)") ///
			  yscale(r(`yscl')) ytitle("", size(small)) ///
			  ylabel(`ylab', format(%3.2f)) ///
			  xlabel(2 "1/2" 51 "1/51" 200 "1/200" 400 "1/400" 600 "1/600" ///
			    800 "1/800" 1000 "1/1000", format(%7.0fc)) ///
				legend(order(1 "Prior to any NIPT coverage" 2 "NIPT coverage for risk in `cov_risk'") ///
				pos(6) span row(1) col(2)) ///
			  xline(`lines') ///
			  plotregion(margin(zero)) xscale(reverse) ///
			  text(`ytext' 25 "High risk", yaxis(1) place(n) size(vsmall)) ///
			  text(`ytext' 125 "Medium risk", yaxis(1) place(n) size(vsmall)) ///
			  text(`ytext' 600 "Low risk", yaxis(1) place(n) size(vsmall)) 
		}
		reshape wide did_nipt did_invasive subt termination_clean no_live healthy chroma pregnancy tot_preg_bin, ///
		  i(bin_number) j(post_nipt)
		gsort -bin_number 
		export delimited using "$RESULTS/draft_tab_fig/fig_data/shares_`gr'.csv", replace
		cap drop bin_number count_preg tot_preg_bin
		restore
	}
end

program export_figures
	* Risk dist + subt figures
	graph export "${RESULTS}\draft_tab_fig\figures/risk_dist_w_subt.pdf", as(pdf) replace ///
			name(risk_dist_w_subt)

	graph export "${RESULTS}\draft_tab_fig\figures/risk_dist.pdf", as(pdf) replace ///
			name(risk_dist)

	graph export "${RESULTS}\draft_tab_fig\figures/share_subt_logscale.pdf", as(pdf) replace ///
			name(share_subt_logscale)
	
	graph export "${RESULTS}\draft_tab_fig\figures/risk_dist_w_subt_1000.pdf", as(pdf) replace ///
			name(risk_dist_w_subt_1000)
	
	graph export "${RESULTS}\draft_tab_fig\figures/risk_dist_1000.pdf", as(pdf) replace ///
			name(risk_dist_1000)
	
	graph export "${RESULTS}\draft_tab_fig\figures/share_subt_1000_logscale.pdf", as(pdf) replace ///
			name(share_subt_1000_logscale)

	* Key figures
	foreach gr in 51_200 hr_cov 51_1000 1000  {
		graph export "${RESULTS}\draft_tab_fig\figures/did_nipt_o_`gr'.pdf", as(pdf) replace ///
			name(did_nipt_o_`gr')
		foreach var in did_invasive subt termination_clean no_live healthy chroma {
			graph export "${RESULTS}\draft_tab_fig\figures/`var'_nb_`gr'.pdf", as(pdf) replace ///
			  name(`var'_nb_`gr')
		} 
	}
end



* EXECUTE
main
